perm filename B05.TEX[162,RWF] blob sn#750186 filedate 1984-06-06 generic text, type T, neo UTF8
\rm

\def\today{\ifcase\month\or
  January\or February\or March\or April\or May\or June\or
  July\or August\or September\or October\or November\or December\fi
  \space\number\day, \number\year}

\line{\sevenrm 162B05.tex[v1,rwf] \today\hfill}

\def\m@th{\mathsurround=0pt}

\def\eqaligntwo #1{\,\vcenter{\openup1\jot \m@th
	\ialign{\strut\hfil$\displaystyle{##}$&$\displaystyle{{}##}$\hfil&\quad
	\hfil$\displaystyle{##}$&$\displaystyle{{}##}$\hfil\crcr#1\crcr}}\,}

\noindent 
Prof. Robert W. Floyd 

\noindent 
CS 162 

\rightline{January 1984}

\noindent
{\bf Bye Bye, Binomial Coefficients}

\vskip .5in
 
Let $d↓p$ be a distribution with $d↓p(0) = q$, $d↓p(1) = p$, and $d↓p = 0$
elsewhere (a Bernoulli trial), where 
\line{$q=1-p$. Then $M(d↓p) = p$, $V(d↓p) = pq$. Taking convolutions, let
$b$ (the binomial distribution) be $d↓p\circ n$;} 
$$b(j) = p↑jq↑{n-j}{n\choose j},\quad M(b) = np, \quad V(b) = npq.$$  
Because $\displaystyle{
{b(j+1)\over b(j)} = {p\over q}\cdot{n-j\over j+1}}$,  $b$ has its maximum at
$j=pn=M(b)$.

Let's examine the hypothesis that the function $b$ is shaped like
$e↑{-Kx↑2}$ in the vicinity of $pn$.  If so, $${r(i)} = {b(pn+i)\over b(pn)}\approx
e↑{-Ki↑2},$$ so we would expect $$s(i) = {\sqrt{-\ln (r(i))}\over i}\approx\sqrt K$$ 
to be a constant.
In fact, since a normal distribution has $K = {1\over 2V},$ we would expect
$${\sqrt{-\ln (r(i))}\over i}\approx (2npq)↑{-1/2}.$$  
Suppose $n=50,$ $p=.4;$ the constant $\sqrt K$ should be  $24↑{-1/2} = .2041.$  

$$\eqaligntwo{
r(1)&={4\over 6}\cdot {30\over 21};&
s(1) &= .2209 \cr 
r(2)&= \left( {4\over 6}\right) ↑2 \cdot {30\cdot 29\over 21\cdot 22};&
s(2) &= .2110 \cr 
r(3)&= \left({4\over 6}\right) ↑3 \cdot
{30\cdot 29\cdot 28\over 21\cdot 22\cdot 23};&
s(3) &= .2073 \cr 
r(4)&= \left({4\over 6}\right) ↑4 \cdot {30\cdot 29\cdot 28\cdot 27\over
21\cdot 22\cdot 23\cdot 24};& s(4) &= .2053 \cr 
r(5)&= \left( {4\over 6}\right) ↑5 \cdot{30 \cdots 26\over 21 \cdots 25};&
s(5)&= .2040\cr}$$

\noindent
so, while the approximation is not exact, it shows promise.

\vfill\eject

For large $n$, and $p$ not too close to $0$ or $1$, we can use this approximation:
$$\eqalign{r(i)&= \left({p\over q}\right) ↑i {(qn)(qn-1)(qn-2)
\cdots (qn-(i-1))\over 
(pn+1)(pn+2)(pn+3)\cdots(pn+i)} \cr \noalign{\vskip 10pt} &= {1\cdot
\left(1-{1\over qn}\right) \left(1-{2\over qn}\right)
\cdots \left(1-{i-1\over qn}\right)\over
\left(1+{1\over pn}\right) \left(1+{2\over pn}\right)
\left(1+{3\over pn}\right) \cdots
\left(1+{i\over pn}\right) };\cr}$$   

\noindent
Using the Taylor expansion of the logs,

$$\eqalign{\ln(r(i))&= -{1\over qn} \sum↑{i-1}↓{j=1}j
-{1\over pn}\sum↑i↓{j=1} j
+ O\left(i↑3/n↑2\right)\cr
&= -{i(i-1)\over 2qn} - {i(i+1)\over 2pn} + O\left(i↑3/n↑2\right)\cr
&= -{i↑2\over 2npq} + O(i/n) + O\left(i↑3/n↑2\right) \cr}.$$
If $i\ll n↑{2/3}$, the approximation is good.  If $i$ is larger, both $b$ and the
approximation are very small, so the absolute error is small.  We conclude
$b(np+i) \approx C e↑{-i↑2/2V},$ with $C =(2πV)↑{-1/2}$ to make
$M↓0(b) = 1$. Therefore $b(np+i)\approx \sqrt{1\over{2\pi V}} e↑{-i↑2/2V}$

\noindent 
Examples: 

\noindent 
(1) $p = .5$; $n = 20$; $i = 0$.
$b(10) = 2↑{-20} {20\choose 10} = .1762$.
The approximation is $(2πV)↑{-1/2} = (10π)↑{-1/2} = .1784$.

\noindent 
(2) $p = .5$, $n = 50$, $i = 0$; $b(25) = .1122$.  The approximation is
$(25π)↑{-1/2} = .1128$.\par \noindent  (3) $p = .5$, $n = 20$,
$i = 5$; $b(15) = 2↑{-20}
{20\choose 5} = .01479$.  The approximation is $.1784 e↑{-25/10}
= .01464$.
\vfill\end